This analysis implements a statistically rigorous three-dimensional approach to measuring international status:
We employ Principal Component Analysis (PCA) for each dimension and then integrate them into a comprehensive status measure using a second-level PCA.
# Load the integrated dataset
integrated_data <- read.csv("/Users/yutianyi/Desktop/MA thesis data creation/integrated_dataset_with_igo.csv")
# Load the network PCA scores
network_pca <- read.csv("/Users/yutianyi/Desktop/MA thesis data creation/all_network_status_pca.csv")
# Examine structure
str(integrated_data)## 'data.frame': 8276 obs. of 46 variables:
## $ Destination : chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ Year : int 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 ...
## $ recognition_count : int 16 18 18 13 14 14 14 14 22 22 ...
## $ avg_embassy_level : num 5.31 5.22 5.94 1 1 ...
## $ avg_lor : num 0.906 0.889 0.931 0.75 0.75 ...
## $ total_potential_senders : int 94 114 116 118 126 127 128 133 134 137 ...
## $ recognition_percentage : num 17 15.8 15.5 11 11.1 ...
## $ ccode : int 700 700 700 700 700 700 700 700 700 700 ...
## $ milex : int 14240 14184 13125 14795 14544 13865 16819 17932 20067 20915 ...
## $ milper : int 60 110 104 110 112 97 97 95 89 89 ...
## $ irst : int 0 0 0 0 0 0 0 0 0 0 ...
## $ pec : int 48 66 112 99 113 144 162 331 405 181 ...
## $ tpop : int 10775 11014 11262 11521 11791 12071 12358 12652 12957 13280 ...
## $ upop : int 444 462 481 500 478 456 434 412 448 483 ...
## $ cinc : num 0.00131 0.00176 0.00166 0.00166 0.00164 ...
## $ continent : chr "Asia" "Asia" "Asia" "Asia" ...
## $ democ : int 0 0 0 0 0 0 0 0 0 0 ...
## $ autoc : int 10 10 10 10 7 7 7 7 7 7 ...
## $ polity : int -10 -10 -10 -10 -7 -7 -7 -7 -7 -7 ...
## $ polity2 : int -10 -10 -10 -10 -7 -7 -7 -7 -7 -7 ...
## $ durable : int NA NA NA NA 0 1 2 3 4 5 ...
## $ xconst : int 1 1 1 1 3 3 3 3 3 3 ...
## $ full_memberships : int 14 16 17 17 17 18 21 21 22 23 ...
## $ associate_memberships : int 0 0 0 0 0 0 0 0 0 0 ...
## $ observer_statuses : int 0 0 0 0 0 0 0 0 0 0 ...
## $ total_memberships : int 14 16 17 17 17 18 21 21 22 23 ...
## $ prestigious_memberships : int 5 5 5 5 5 5 5 5 5 5 ...
## $ prestigious_ratio : num 0.357 0.312 0.294 0.294 0.294 ...
## $ regional_memberships : int 0 0 0 0 0 0 0 0 0 0 ...
## $ global_memberships : int 14 16 17 17 17 18 21 21 22 23 ...
## $ regional_global_ratio : num 0 0 0 0 0 0 0 0 0 0 ...
## $ security_memberships : int 0 0 0 0 0 0 0 0 0 0 ...
## $ economic_memberships : int 3 3 3 3 3 3 3 3 3 3 ...
## $ political_memberships : int 1 1 1 1 1 1 1 1 1 1 ...
## $ institutional_status : num 0.177 0.0598 0.0788 -0.0407 -0.0308 ...
## $ functional_balance : num 0.118 0.118 0.118 0.118 0.118 ...
## $ cinc_z : num -0.298 -0.259 -0.252 -0.264 -0.254 ...
## $ recognition_z : num -0.771 -0.495 -0.519 -0.924 -0.851 ...
## $ material_institutional_gap : num -0.475 -0.319 -0.331 -0.223 -0.223 ...
## $ recognition_institutional_gap: num -0.948 -0.555 -0.598 -0.883 -0.82 ...
## $ status_inconsistency : num 1.423 0.874 0.929 1.106 1.043 ...
## $ material_quartile : int 2 2 3 3 3 3 3 3 2 3 ...
## $ recognition_quartile : int 1 2 2 1 1 1 1 1 2 2 ...
## $ institutional_quartile : int 2 2 2 2 2 2 2 2 2 2 ...
## $ status_score : num 1.67 2 2.33 2 2 ...
## $ status_type : chr "Middle Status" "Middle Status" "Middle Status" "Middle Status" ...
## 'data.frame': 1167 obs. of 18 variables:
## $ country : chr "Czechoslovakia" "Egypt" "France" "Germany" ...
## $ recognition_count : int 39 58 82 66 55 33 36 73 58 37 ...
## $ weighted_recognition : num 49.6 60.2 75.5 64.4 43.8 ...
## $ eigenvector_centrality : num 0.528 0.71 1 0.882 0.66 ...
## $ pagerank : num 0.0125 0.0196 0.0379 0.0252 0.0205 ...
## $ authority : num 0.547 0.725 1 0.881 0.665 ...
## $ betweenness : num 0.024485 0.000141 0.026078 0.090051 0.042696 ...
## $ recognition_rate : num 0.394 0.586 0.828 0.667 0.556 ...
## $ network_inconsistency : num 0.29 0.862 1.213 1.375 0.54 ...
## $ outgoing_ties : int 59 61 83 68 44 34 32 78 60 34 ...
## $ recognition_balance : int -20 -3 -1 -2 11 -1 4 -5 -2 3 ...
## $ recognition_ratio : num 0.661 0.951 0.988 0.971 1.25 ...
## $ recognition_status_pca : num 0.53 0.734 1 0.819 0.642 ...
## $ prestige_status_pca : num 0.44 0.626 1 0.786 0.604 ...
## $ brokerage_status_pca : num 0.388 0.321 0.636 1 0.605 ...
## $ overall_status_network_pca: num 0.503 0.643 1 0.915 0.656 ...
## $ year : int 1960 1960 1960 1960 1960 1960 1960 1960 1960 1960 ...
## $ external_internal_ratio : num 0.661 0.951 0.988 0.971 1.25 ...
# Check time periods
cat("Integrated dataset year range:", min(integrated_data$Year), "to", max(integrated_data$Year), "\n")## Integrated dataset year range: 1960 to 2016
## Network PCA dataset year range: 1960 to 2020
## Destination Year recognition_count avg_embassy_level avg_lor
## 1 Afghanistan 1960 16 5.312500 0.9062500
## 2 Afghanistan 1961 18 5.222222 0.8888889
## 3 Afghanistan 1962 18 5.944444 0.9305556
## 4 Afghanistan 1963 13 1.000000 0.7500000
## 5 Afghanistan 1964 14 1.000000 0.7500000
## 6 Afghanistan 1965 14 1.000000 0.7500000
## total_potential_senders recognition_percentage ccode milex milper irst pec
## 1 94 17.02128 700 14240 60 0 48
## 2 114 15.78947 700 14184 110 0 66
## 3 116 15.51724 700 13125 104 0 112
## 4 118 11.01695 700 14795 110 0 99
## 5 126 11.11111 700 14544 112 0 113
## 6 127 11.02362 700 13865 97 0 144
## tpop upop cinc continent democ autoc polity polity2 durable xconst
## 1 10775 444 0.001306553 Asia 0 10 -10 -10 NA 1
## 2 11014 462 0.001757446 Asia 0 10 -10 -10 NA 1
## 3 11262 481 0.001660480 Asia 0 10 -10 -10 NA 1
## 4 11521 500 0.001663900 Asia 0 10 -10 -10 NA 1
## 5 11791 478 0.001643951 Asia 0 7 -7 -7 0 3
## 6 12071 456 0.001545076 Asia 0 7 -7 -7 1 3
## full_memberships associate_memberships observer_statuses total_memberships
## 1 14 0 0 14
## 2 16 0 0 16
## 3 17 0 0 17
## 4 17 0 0 17
## 5 17 0 0 17
## 6 18 0 0 18
## prestigious_memberships prestigious_ratio regional_memberships
## 1 5 0.3571429 0
## 2 5 0.3125000 0
## 3 5 0.2941176 0
## 4 5 0.2941176 0
## 5 5 0.2941176 0
## 6 5 0.2777778 0
## global_memberships regional_global_ratio security_memberships
## 1 14 0 0
## 2 16 0 0
## 3 17 0 0
## 4 17 0 0
## 5 17 0 0
## 6 18 0 0
## economic_memberships political_memberships institutional_status
## 1 3 1 0.17698673
## 2 3 1 0.05983547
## 3 3 1 0.07877260
## 4 3 1 -0.04070601
## 5 3 1 -0.03081537
## 6 3 1 -0.09041296
## functional_balance cinc_z recognition_z material_institutional_gap
## 1 0.1178732 -0.2979417 -0.7711535 -0.4749284
## 2 0.1178732 -0.2594530 -0.4950857 -0.3192885
## 3 0.1178732 -0.2519564 -0.5190143 -0.3307290
## 4 0.1178732 -0.2639189 -0.9237552 -0.2232129
## 5 0.1178732 -0.2542935 -0.8505159 -0.2234781
## 6 0.1178732 -0.2561434 -0.8765413 -0.1657305
## recognition_institutional_gap status_inconsistency material_quartile
## 1 -0.9481403 1.4230687 2
## 2 -0.5549211 0.8742096 2
## 3 -0.5977869 0.9285159 3
## 4 -0.8830491 1.1062620 3
## 5 -0.8197006 1.0431787 3
## 6 -0.7861283 0.9518588 3
## recognition_quartile institutional_quartile status_score status_type
## 1 1 2 1.666667 Middle Status
## 2 2 2 2.000000 Middle Status
## 3 2 2 2.333333 Middle Status
## 4 1 2 2.000000 Middle Status
## 5 1 2 2.000000 Middle Status
## 6 1 2 2.000000 Middle Status
## country recognition_count weighted_recognition eigenvector_centrality
## 1 Czechoslovakia 39 49.625 0.5282203
## 2 Egypt 58 60.250 0.7104572
## 3 France 82 75.500 1.0000000
## 4 Germany 66 64.375 0.8823183
## 5 India 55 43.750 0.6602705
## 6 Indonesia 33 31.750 0.4904962
## pagerank authority betweenness recognition_rate network_inconsistency
## 1 0.01252877 0.5467224 0.0244847825 0.3939394 0.2902262
## 2 0.01957898 0.7248757 0.0001410069 0.5858586 0.8616718
## 3 0.03789240 1.0000000 0.0260780360 0.8282828 1.2127113
## 4 0.02517951 0.8811520 0.0900514866 0.6666667 1.3747474
## 5 0.02051752 0.6654690 0.0426958574 0.5555556 0.5401825
## 6 0.01140977 0.5061635 0.0472616721 0.3333333 0.8547527
## outgoing_ties recognition_balance recognition_ratio recognition_status_pca
## 1 59 -20 0.6610169 0.5300235
## 2 61 -3 0.9508197 0.7343870
## 3 83 -1 0.9879518 1.0000000
## 4 68 -2 0.9705882 0.8190107
## 5 44 11 1.2500000 0.6415824
## 6 34 -1 0.9705882 0.4061870
## prestige_status_pca brokerage_status_pca overall_status_network_pca year
## 1 0.4395712 0.3878857 0.5032924 1960
## 2 0.6257682 0.3205850 0.6427105 1960
## 3 1.0000000 0.6361345 1.0000000 1960
## 4 0.7860529 1.0000000 0.9148921 1960
## 5 0.6041636 0.6047510 0.6562076 1960
## 6 0.4038194 0.5157629 0.4653441 1960
## external_internal_ratio
## 1 0.6610169
## 2 0.9508197
## 3 0.9879518
## 4 0.9705882
## 5 1.2500000
## 6 0.9705882
First, we’ll create a PCA-based composite score for attribute status using material capabilities, regime characteristics, and institutional membership.
# Select relevant attribute variables
attributes_data <- integrated_data %>%
select(
# Material capabilities
milex, milper, irst, pec, tpop, upop, cinc,
# Regime characteristics
democ, autoc, durable, xconst,
# Institutional membership
total_memberships, prestigious_memberships, security_memberships,
economic_memberships, political_memberships
)
# Check for missing values
missing_count <- colSums(is.na(attributes_data))
print("Missing values in attribute variables:")## [1] "Missing values in attribute variables:"
## milex milper irst
## 0 0 0
## pec tpop upop
## 0 0 0
## cinc democ autoc
## 0 82 82
## durable xconst total_memberships
## 87 82 398
## prestigious_memberships security_memberships economic_memberships
## 398 398 398
## political_memberships
## 398
# If there are missing values, impute with median
attributes_data_clean <- attributes_data %>%
mutate(across(everything(), ~if_else(is.na(.), median(., na.rm = TRUE), .)))
# Run PCA on attribute data
attributes_pca <- prcomp(attributes_data_clean, scale = TRUE)
# Examine variance explained
attributes_pca_var <- get_eigenvalue(attributes_pca)
head(attributes_pca_var)## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 5.4189764 33.868602 33.86860
## Dim.2 3.3276161 20.797601 54.66620
## Dim.3 2.6148620 16.342888 71.00909
## Dim.4 1.1976585 7.485366 78.49446
## Dim.5 0.8396626 5.247892 83.74235
## Dim.6 0.6290110 3.931319 87.67367
# Variable contributions to principal components
var_contrib <- get_pca_var(attributes_pca)$contrib
head(round(var_contrib[, 1:3], 2))## Dim.1 Dim.2 Dim.3
## milex 8.50 0.03 0.26
## milper 11.19 2.85 0.61
## irst 11.73 1.21 0.01
## pec 15.95 0.68 0.04
## tpop 10.81 2.33 0.30
## upop 13.65 1.67 0.05
# Extract the first principal component as attribute status score
pc_scores <- as.data.frame(attributes_pca$x)
# Create a clean attributes status score
# First, check if PC1 correlates positively with cinc (to ensure higher = more status)
cinc_pc1_cor <- cor(attributes_data_clean$cinc, pc_scores$PC1)
cat("Correlation between CINC and PC1:", cinc_pc1_cor, "\n")## Correlation between CINC and PC1: 0.9239289
# If negative, flip the sign
if(cinc_pc1_cor < 0) {
pc_scores$PC1 <- -pc_scores$PC1
cat("PC1 direction flipped to ensure positive correlation with capabilities\n")
}
# Scale to 0-1 range
attribute_status <- (pc_scores$PC1 - min(pc_scores$PC1)) / (max(pc_scores$PC1) - min(pc_scores$PC1))
# Add back to integrated dataset
integrated_data$attribute_status_pca <- attribute_statusWe’ll use the pre-calculated network PCA scores from the diplomatic recognition analysis.
# Standardize country name format between datasets
# First, check for name discrepancies
integrated_names <- unique(integrated_data$Destination)
network_names <- unique(network_pca$country)
# Create a name mapping function if needed
map_country_names <- function(name) {
mapping <- c(
"United States" = "United States of America",
"UK" = "United Kingdom",
"Russia" = "Russian Federation",
"Vietnam" = "Viet Nam"
# Add more mappings as needed
)
if(name %in% names(mapping)) {
return(mapping[name])
} else {
return(name)
}
}
# Apply name mapping to network data
network_pca$country_mapped <- sapply(network_pca$country, map_country_names)
# Rename country column for clarity
colnames(network_pca)[colnames(network_pca) == "country"] <- "country_original"
# Merge datasets
status_data <- integrated_data %>%
left_join(network_pca, by = c("Destination" = "country_mapped", "Year" = "year"))
# Check how many rows have both attribute and network status
cat("Total rows in merged dataset:", nrow(status_data), "\n")## Total rows in merged dataset: 8276
cat("Rows with both attribute and network status:",
sum(!is.na(status_data$attribute_status_pca) & !is.na(status_data$overall_status_network_pca)), "\n")## Rows with both attribute and network status: 820
# Sample of the merged data
head(status_data %>%
select(Destination, Year, attribute_status_pca, overall_status_network_pca))## Destination Year attribute_status_pca overall_status_network_pca
## 1 Afghanistan 1960 0.04715593 0.2023305
## 2 Afghanistan 1961 0.04890810 NA
## 3 Afghanistan 1962 0.04895709 NA
## 4 Afghanistan 1963 0.04911601 NA
## 5 Afghanistan 1964 0.04684636 NA
## 6 Afghanistan 1965 0.04686371 NA
Now we’ll create multiple temporal features and derive a PCA-based temporal status dimension.
# Step 1: Filter countries with sufficient time points for temporal analysis
countries_with_history <- status_data %>%
filter(!is.na(attribute_status_pca) & !is.na(overall_status_network_pca)) %>%
group_by(Destination) %>%
summarize(
years_available = n_distinct(Year),
.groups = "drop"
) %>%
filter(years_available >= 5) %>% # Need at least 5 years for meaningful temporal analysis
pull(Destination)
cat("Number of countries with sufficient temporal data:", length(countries_with_history), "\n")## Number of countries with sufficient temporal data: 118
# Step 2: Calculate combined status score for temporal analysis
# We'll use a simple average of attribute and network status for now
status_data <- status_data %>%
mutate(
interim_combined_status = (attribute_status_pca + overall_status_network_pca) / 2
)
# Step 3: Extract multiple temporal features
temporal_features <- status_data %>%
filter(Destination %in% countries_with_history) %>%
filter(!is.na(interim_combined_status)) %>%
group_by(Destination) %>%
arrange(Year) %>%
summarize(
# Trend - linear regression slope (trajectory over time)
status_trajectory = coef(lm(interim_combined_status ~ Year))[2],
# Stability - inverse of volatility (standard deviation)
status_volatility = sd(interim_combined_status),
# Persistence - safer approach avoiding rle issues
status_persistence = {
# Create bins manually
bins <- quantile(interim_combined_status, probs = seq(0, 1, 0.25))
# Categorize each value
categories <- findInterval(interim_combined_status, bins)
# Now count consecutive years in same category
runs <- rle(categories)
max(runs$lengths)
},
# Recent change - simplified approach
recent_change = {
n <- length(interim_combined_status)
if (n >= 6) {
mean(tail(interim_combined_status, 3)) - mean(interim_combined_status[(n-5):(n-3)])
} else if (n >= 2) {
tail(interim_combined_status, 1) - interim_combined_status[1]
} else {
0
}
},
# Acceleration - second derivative (change in change)
status_acceleration = if(length(interim_combined_status) >= 3)
sd(diff(interim_combined_status)) else NA,
# Average status level (for reference)
avg_status = mean(interim_combined_status),
# Years of data
years_of_data = n(),
# Year range
min_year = min(Year),
max_year = max(Year),
.groups = "drop"
)
# Handle any NAs in temporal features
temporal_features <- temporal_features %>%
mutate(across(c(status_trajectory, status_volatility, status_persistence,
recent_change, status_acceleration),
~ifelse(is.na(.), median(., na.rm = TRUE), .)))
# Step 4: Run PCA on temporal features
temporal_pca_data <- temporal_features %>%
select(status_trajectory, status_volatility, status_persistence,
recent_change, status_acceleration)
temporal_pca <- prcomp(temporal_pca_data, scale = TRUE)
summary(temporal_pca)## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 1.7823 0.9811 0.8511 0.34432 0.13428
## Proportion of Variance 0.6353 0.1925 0.1449 0.02371 0.00361
## Cumulative Proportion 0.6353 0.8278 0.9727 0.99639 1.00000
# Check variable contributions
var_contrib_temporal <- get_pca_var(temporal_pca)$contrib
print("Variable contributions to temporal PCA:")## [1] "Variable contributions to temporal PCA:"
## Dim.1 Dim.2
## status_trajectory 28.15 0.70
## status_volatility 28.09 1.56
## status_persistence 4.44 73.06
## recent_change 25.88 0.21
## status_acceleration 13.44 24.48
# Extract the first principal component
temporal_pc_scores <- as.data.frame(temporal_pca$x)
# Check if PC1 correlates positively with trajectory (to ensure higher = more positive trajectory)
traj_pc1_cor <- cor(temporal_features$status_trajectory, temporal_pc_scores$PC1)
cat("Correlation between trajectory and PC1:", traj_pc1_cor, "\n")## Correlation between trajectory and PC1: 0.9455365
# If negative, flip the sign
if(traj_pc1_cor < 0) {
temporal_pc_scores$PC1 <- -temporal_pc_scores$PC1
cat("PC1 direction flipped to ensure positive correlation with trajectory\n")
}
# Scale to 0-1 range
temporal_status_pca <- (temporal_pc_scores$PC1 - min(temporal_pc_scores$PC1)) /
(max(temporal_pc_scores$PC1) - min(temporal_pc_scores$PC1))
# Create data frame with temporal status
temporal_status_df <- data.frame(
Destination = temporal_features$Destination,
temporal_status_pca = temporal_status_pca,
status_trajectory = temporal_features$status_trajectory,
status_volatility = temporal_features$status_volatility,
status_persistence = temporal_features$status_persistence,
years_of_data = temporal_features$years_of_data
)
# Display the top countries by temporal status
kable(temporal_status_df %>%
arrange(desc(temporal_status_pca)) %>%
select(Destination, temporal_status_pca, status_trajectory, status_volatility) %>%
head(15),
caption = "Top 15 Countries by Temporal Status PCA",
digits = 3)| Destination | temporal_status_pca | status_trajectory | status_volatility |
|---|---|---|---|
| China | 1.000 | 0.012 | 0.231 |
| Nigeria | 0.500 | 0.004 | 0.107 |
| South Africa | 0.482 | 0.004 | 0.105 |
| Korea, Republic of | 0.482 | 0.006 | 0.108 |
| Libya | 0.387 | 0.003 | 0.075 |
| Malaysia | 0.322 | 0.003 | 0.064 |
| Kuwait | 0.305 | 0.002 | 0.049 |
| Hungary | 0.305 | 0.003 | 0.058 |
| Cuba | 0.300 | 0.003 | 0.056 |
| Saudi Arabia | 0.299 | 0.003 | 0.060 |
| Belgium | 0.292 | 0.003 | 0.054 |
| Iran | 0.284 | 0.002 | 0.048 |
| Romania | 0.284 | 0.002 | 0.056 |
| India | 0.268 | 0.003 | 0.055 |
| Korea, Democratic People’s Republic of | 0.257 | 0.002 | 0.048 |
Let’s analyze what the temporal status PCA represents and which countries score highest.
# Correlations between temporal PCA and original features
temporal_correlations <- cor(
cbind(temporal_status_df$temporal_status_pca, temporal_pca_data),
use = "pairwise.complete.obs"
)
colnames(temporal_correlations)[1] <- "temporal_status_pca"
rownames(temporal_correlations)[1] <- "temporal_status_pca"
# Plot correlations
corrplot(temporal_correlations, method = "color", type = "upper",
tl.col = "black", tl.srt = 45, addCoef.col = "black")# Create scatter plots to visualize relationships
par(mfrow = c(2, 2))
plot(temporal_status_df$status_trajectory, temporal_status_df$temporal_status_pca,
main = "Trajectory vs Temporal Status", xlab = "Status Trajectory", ylab = "Temporal Status PCA")
plot(temporal_status_df$status_volatility, temporal_status_df$temporal_status_pca,
main = "Volatility vs Temporal Status", xlab = "Status Volatility", ylab = "Temporal Status PCA")
plot(temporal_status_df$status_persistence, temporal_status_df$temporal_status_pca,
main = "Persistence vs Temporal Status", xlab = "Status Persistence", ylab = "Temporal Status PCA")
par(mfrow = c(1, 1))# Create descriptive categories based on temporal features
temporal_status_df <- temporal_status_df %>%
mutate(
temporal_profile = case_when(
status_trajectory > median(status_trajectory) & status_volatility < median(status_volatility) ~
"Steadily Rising",
status_trajectory > median(status_trajectory) & status_volatility > median(status_volatility) ~
"Volatile Rising",
status_trajectory < median(status_trajectory) & status_volatility < median(status_volatility) ~
"Steadily Declining",
TRUE ~ "Volatile Declining"
)
)
# Show distribution of temporal profiles
table(temporal_status_df$temporal_profile)##
## Steadily Declining Steadily Rising Volatile Declining Volatile Rising
## 41 18 18 41
# Plot temporal profiles
ggplot(temporal_status_df, aes(x = status_trajectory, y = status_volatility, color = temporal_profile)) +
geom_point(size = 3, alpha = 0.7) +
geom_hline(yintercept = median(temporal_status_df$status_volatility), linetype = "dashed", color = "gray") +
geom_vline(xintercept = median(temporal_status_df$status_trajectory), linetype = "dashed", color = "gray") +
geom_text(data = temporal_status_df %>%
filter(abs(status_trajectory) > quantile(abs(status_trajectory), 0.9) |
status_volatility > quantile(status_volatility, 0.9)),
aes(label = Destination), hjust = -0.1, vjust = -0.1, size = 3) +
labs(
title = "Status Temporal Profiles",
x = "Status Trajectory",
y = "Status Volatility",
color = "Temporal Profile"
) +
theme_minimal() +
scale_color_brewer(palette = "Set1")Now we’ll combine all three PCA-based scores into a comprehensive status measure using a second-level PCA.
# Step 1: Create dataset with all three dimensions
three_dimensions <- status_data %>%
select(Destination, Year, attribute_status_pca, overall_status_network_pca) %>%
filter(!is.na(attribute_status_pca) & !is.na(overall_status_network_pca)) %>%
# Join with temporal dimension
left_join(temporal_status_df %>% select(Destination, temporal_status_pca),
by = "Destination") %>%
filter(!is.na(temporal_status_pca))
# Check data dimensions
cat("Countries with all three status dimensions:", n_distinct(three_dimensions$Destination), "\n")## Countries with all three status dimensions: 118
## Total observations with all three dimensions: 669
# Step 2: Select the most recent year for each country for the final analysis
latest_year <- max(three_dimensions$Year)
cat("Latest year in dataset:", latest_year, "\n")## Latest year in dataset: 2010
latest_data <- three_dimensions %>%
filter(Year == latest_year)
# Step 3: Examine correlations between dimensions
dim_correlations <- cor(latest_data %>%
select(attribute_status_pca, overall_status_network_pca, temporal_status_pca),
use = "pairwise.complete.obs")
print("Correlations between status dimensions:")## [1] "Correlations between status dimensions:"
## attribute_status_pca overall_status_network_pca
## attribute_status_pca 1.0000000 0.5565892
## overall_status_network_pca 0.5565892 1.0000000
## temporal_status_pca 0.6819791 0.5327807
## temporal_status_pca
## attribute_status_pca 0.6819791
## overall_status_network_pca 0.5327807
## temporal_status_pca 1.0000000
# Step 4: Run PCA on all three dimensions
final_pca <- prcomp(latest_data %>%
select(attribute_status_pca, overall_status_network_pca, temporal_status_pca),
center = TRUE, scale. = TRUE)
# Examine PCA results
summary(final_pca)## Importance of components:
## PC1 PC2 PC3
## Standard deviation 1.4776 0.7069 0.5629
## Proportion of Variance 0.7278 0.1666 0.1056
## Cumulative Proportion 0.7278 0.8944 1.0000
## [1] "Component loadings:"
## PC1 PC2 PC3
## attribute_status_pca 0.5960763 -0.3301050 0.73193152
## overall_status_network_pca 0.5455627 0.8353427 -0.06755575
## temporal_status_pca 0.5891132 -0.4395829 -0.67802100
# Extract first component as comprehensive status
comprehensive_pc <- final_pca$x[,1]
# Check if PC1 correlates positively with attribute status
comp_attr_cor <- cor(latest_data$attribute_status_pca, comprehensive_pc)
cat("Correlation between comprehensive PC and attribute status:", comp_attr_cor, "\n")## Correlation between comprehensive PC and attribute status: 0.8807891
# If negative, flip sign
if(comp_attr_cor < 0) {
comprehensive_pc <- -comprehensive_pc
cat("Comprehensive PC direction flipped to ensure positive correlation with status\n")
}
# Scale to 0-1 range
comprehensive_status_pca <- (comprehensive_pc - min(comprehensive_pc)) /
(max(comprehensive_pc) - min(comprehensive_pc))
# Add to dataset
latest_data$comprehensive_status_pca <- comprehensive_status_pca
# Display top countries by comprehensive status
kable(latest_data %>%
arrange(desc(comprehensive_status_pca)) %>%
select(Destination, comprehensive_status_pca, attribute_status_pca,
overall_status_network_pca, temporal_status_pca) %>%
head(20),
caption = paste("Top 20 Countries by Comprehensive Status (", latest_year, ")"),
digits = 3)| Destination | comprehensive_status_pca | attribute_status_pca | overall_status_network_pca | temporal_status_pca |
|---|---|---|---|---|
| China | 1.000 | 0.853 | 0.889 | 1.000 |
| India | 0.424 | 0.378 | 0.731 | 0.268 |
| Korea, Republic of | 0.343 | 0.149 | 0.574 | 0.482 |
| South Africa | 0.326 | 0.095 | 0.641 | 0.482 |
| Japan | 0.311 | 0.212 | 0.800 | 0.180 |
| Nigeria | 0.305 | 0.103 | 0.477 | 0.500 |
| Belgium | 0.301 | 0.109 | 0.836 | 0.292 |
| Canada | 0.258 | 0.148 | 0.681 | 0.199 |
| Saudi Arabia | 0.256 | 0.119 | 0.565 | 0.299 |
| United Kingdom | 0.255 | 0.164 | 0.840 | 0.084 |
| France | 0.244 | 0.162 | 0.838 | 0.058 |
| Italy | 0.241 | 0.156 | 0.724 | 0.121 |
| Iran | 0.241 | 0.110 | 0.546 | 0.284 |
| Libya | 0.241 | 0.072 | 0.460 | 0.387 |
| Hungary | 0.238 | 0.098 | 0.524 | 0.305 |
| Malaysia | 0.234 | 0.080 | 0.521 | 0.322 |
| Australia | 0.229 | 0.130 | 0.545 | 0.222 |
| Brazil | 0.227 | 0.170 | 0.619 | 0.116 |
| Cuba | 0.219 | 0.061 | 0.541 | 0.300 |
| Spain | 0.217 | 0.123 | 0.646 | 0.147 |
# Visualize contribution of each dimension to comprehensive status
fviz_pca_var(final_pca, col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)# Plot countries in the space of first two principal components
fviz_pca_ind(final_pca,
geom.ind = "point",
col.ind = latest_data$comprehensive_status_pca,
gradient.cols = c("blue", "yellow", "red"),
repel = TRUE,
axes = c(1, 2),
pointsize = "cos2",
pointshape = 21,
palette = "jco",
addEllipses = FALSE,
label = "ind",
mean.point = FALSE) +
scale_color_gradient(name = "Status Score") +
labs(title = paste("Countries Positioned by Status Components -", latest_year),
x = paste0("PC1 (", round(summary(final_pca)$importance[2,1]*100, 1), "%)"),
y = paste0("PC2 (", round(summary(final_pca)$importance[2,2]*100, 1), "%)"))Let’s use cluster analysis to identify natural groupings of countries in the status space.
# Prepare data for clustering
cluster_data <- latest_data %>%
select(attribute_status_pca, overall_status_network_pca, temporal_status_pca) %>%
scale()
# Determine optimal number of clusters
# Using silhouette method
fviz_nbclust(cluster_data, kmeans, method = "silhouette")# K-means clustering with optimal number of clusters
set.seed(123) # For reproducibility
k_optimal <- 4 # Based on silhouette method
kmeans_result <- kmeans(cluster_data, centers = k_optimal, nstart = 25)
# Add cluster assignments to the data
latest_data$status_cluster <- as.factor(kmeans_result$cluster)
# Visualize clusters
fviz_cluster(kmeans_result, data = cluster_data,
geom = "point",
ellipse.type = "convex",
ggtheme = theme_minimal(),
main = paste("Status Clusters -", latest_year))# Plot countries by cluster in 3D dimensions
# Since we can't easily show a 3D plot in an R Markdown document,
# We'll create three 2D plots for different dimension pairs
# Plot 1: Attribute vs. Network
ggplot(latest_data, aes(x = attribute_status_pca, y = overall_status_network_pca, color = status_cluster)) +
geom_point(size = 3, alpha = 0.7) +
geom_text(data = latest_data %>%
filter(comprehensive_status_pca > quantile(comprehensive_status_pca, 0.9)),
aes(label = Destination), hjust = -0.1, vjust = -0.1, size = 3) +
labs(
title = "Status Clusters: Attribute vs. Network Dimensions",
x = "Attribute Status",
y = "Network Status",
color = "Cluster"
) +
theme_minimal() +
scale_color_brewer(palette = "Set1")# Plot 2: Attribute vs. Temporal
ggplot(latest_data, aes(x = attribute_status_pca, y = temporal_status_pca, color = status_cluster)) +
geom_point(size = 3, alpha = 0.7) +
geom_text(data = latest_data %>%
filter(comprehensive_status_pca > quantile(comprehensive_status_pca, 0.9)),
aes(label = Destination), hjust = -0.1, vjust = -0.1, size = 3) +
labs(
title = "Status Clusters: Attribute vs. Temporal Dimensions",
x = "Attribute Status",
y = "Temporal Status",
color = "Cluster"
) +
theme_minimal() +
scale_color_brewer(palette = "Set1")# Plot 3: Network vs. Temporal
ggplot(latest_data, aes(x = overall_status_network_pca, y = temporal_status_pca, color = status_cluster)) +
geom_point(size = 3, alpha = 0.7) +
geom_text(data = latest_data %>%
filter(comprehensive_status_pca > quantile(comprehensive_status_pca, 0.9)),
aes(label = Destination), hjust = -0.1, vjust = -0.1, size = 3) +
labs(
title = "Status Clusters: Network vs. Temporal Dimensions",
x = "Network Status",
y = "Temporal Status",
color = "Cluster"
) +
theme_minimal() +
scale_color_brewer(palette = "Set1")# Characterize the clusters
cluster_characteristics <- latest_data %>%
group_by(status_cluster) %>%
summarize(
count = n(),
avg_attribute = mean(attribute_status_pca),
avg_network = mean(overall_status_network_pca),
avg_temporal = mean(temporal_status_pca),
avg_comprehensive = mean(comprehensive_status_pca),
std_comprehensive = sd(comprehensive_status_pca),
countries = paste(head(Destination[order(-comprehensive_status_pca)], 4), collapse = ", ")
) %>%
arrange(desc(avg_comprehensive))
kable(cluster_characteristics, digits = 3,
caption = paste("Characteristics of Status Clusters (", latest_year, ")"))| status_cluster | count | avg_attribute | avg_network | avg_temporal | avg_comprehensive | std_comprehensive | countries |
|---|---|---|---|---|---|---|---|
| 2 | 1 | 0.853 | 0.889 | 1.000 | 1.000 | NA | China |
| 4 | 13 | 0.118 | 0.562 | 0.347 | 0.273 | 0.064 | India, Korea, Republic of, South Africa, Nigeria |
| 1 | 37 | 0.109 | 0.528 | 0.153 | 0.188 | 0.040 | Japan, Canada, United Kingdom, France |
| 3 | 67 | 0.065 | 0.176 | 0.096 | 0.071 | 0.031 | Korea, Democratic People’s Republic of, Iraq, Gabon, New Zealand |
Let’s analyze how countries position across the three status dimensions and identify interesting patterns.
# Create standardized scores (z-scores) for easier comparison across dimensions
latest_data <- latest_data %>%
mutate(
attribute_z = scale(attribute_status_pca)[,1],
network_z = scale(overall_status_network_pca)[,1],
temporal_z = scale(temporal_status_pca)[,1],
# Calculate dimensional gaps
attribute_network_gap = attribute_z - network_z,
attribute_temporal_gap = attribute_z - temporal_z,
network_temporal_gap = network_z - temporal_z,
# Calculate total status inconsistency
status_inconsistency = sqrt(attribute_network_gap^2 +
attribute_temporal_gap^2 +
network_temporal_gap^2)
)
# Display countries with highest status inconsistency
kable(latest_data %>%
arrange(desc(status_inconsistency)) %>%
select(Destination, comprehensive_status_pca, attribute_z, network_z, temporal_z,
status_inconsistency) %>%
head(15),
caption = paste("Countries with Highest Status Inconsistency (", latest_year, ")"),
digits = 3)| Destination | comprehensive_status_pca | attribute_z | network_z | temporal_z | status_inconsistency |
|---|---|---|---|---|---|
| China | 1.000 | 9.326 | 2.608 | 6.977 | 8.350 |
| France | 0.244 | 0.863 | 2.367 | -0.751 | 3.819 |
| United Kingdom | 0.255 | 0.894 | 2.375 | -0.535 | 3.565 |
| Nigeria | 0.305 | 0.148 | 0.671 | 2.875 | 3.545 |
| South Africa | 0.326 | 0.049 | 1.442 | 2.732 | 3.287 |
| India | 0.424 | 3.513 | 1.866 | 0.977 | 3.152 |
| Libya | 0.241 | -0.234 | 0.590 | 1.952 | 2.705 |
| Belgium | 0.301 | 0.216 | 2.358 | 1.168 | 2.628 |
| Korea, Republic of | 0.343 | 0.706 | 1.124 | 2.730 | 2.617 |
| Italy | 0.241 | 0.791 | 1.833 | -0.233 | 2.531 |
| Egypt | 0.217 | 0.173 | 1.743 | -0.061 | 2.402 |
| Japan | 0.311 | 1.475 | 2.191 | 0.254 | 2.398 |
| Cuba | 0.219 | -0.365 | 0.970 | 1.238 | 2.103 |
| Brazil | 0.227 | 0.972 | 1.338 | -0.274 | 2.070 |
| Netherlands | 0.195 | 0.401 | 1.292 | -0.352 | 2.016 |
# Identify countries with interesting status profiles
# 1. Countries with high attribute but low network status
attribute_dominant <- latest_data %>%
filter(attribute_z > 0.5 & network_z < -0.5) %>%
arrange(desc(attribute_z))
# 2. Countries with high network but low attribute status
network_dominant <- latest_data %>%
filter(network_z > 0.5 & attribute_z < -0.5) %>%
arrange(desc(network_z))
# 3. Countries with high temporal but low current status
rising_potential <- latest_data %>%
filter(temporal_z > 0.5 & (attribute_z + network_z)/2 < -0.3) %>%
arrange(desc(temporal_z))
# Display these special cases
kable(attribute_dominant %>%
select(Destination, comprehensive_status_pca, attribute_z, network_z, temporal_z),
caption = "Countries with High Attribute but Low Network Status",
digits = 3)| Destination | comprehensive_status_pca | attribute_z | network_z | temporal_z |
|---|
kable(network_dominant %>%
select(Destination, comprehensive_status_pca, attribute_z, network_z, temporal_z),
caption = "Countries with High Network but Low Attribute Status",
digits = 3)| Destination | comprehensive_status_pca | attribute_z | network_z | temporal_z |
|---|
kable(rising_potential %>%
select(Destination, comprehensive_status_pca, attribute_z, network_z, temporal_z),
caption = "Countries with Strong Temporal Trend but Lower Current Status",
digits = 3)| Destination | comprehensive_status_pca | attribute_z | network_z | temporal_z |
|---|---|---|---|---|
| Korea, Democratic People’s Republic of | 0.145 | -0.137 | -0.646 | 0.886 |
| Gabon | 0.131 | -0.350 | -0.655 | 0.818 |
# Create status profile plot for selected countries
selected_countries <- c(
# High status countries
"United States of America", "China", "France", "United Kingdom", "Germany", "Japan",
# Status inconsistent countries
head(attribute_dominant$Destination, 2),
head(network_dominant$Destination, 2),
head(rising_potential$Destination, 2)
)
# Prepare data for radar chart
selected_data <- latest_data %>%
filter(Destination %in% selected_countries) %>%
select(Destination, attribute_z, network_z, temporal_z) %>%
# Scale for radar chart (0-1 range)
mutate(across(c(attribute_z, network_z, temporal_z),
~ (. - min(latest_data[, cur_column()], na.rm = TRUE)) /
(max(latest_data[, cur_column()], na.rm = TRUE) -
min(latest_data[, cur_column()], na.rm = TRUE))))
# Convert to long format for ggplot
selected_long <- selected_data %>%
pivot_longer(cols = c(attribute_z, network_z, temporal_z),
names_to = "dimension",
values_to = "value")
# Create radar chart-like visualization
ggplot(selected_long, aes(x = dimension, y = value, group = Destination, color = Destination)) +
geom_line(size = 1) +
geom_point(size = 3) +
coord_polar() +
scale_x_discrete(labels = c("attribute_z" = "Attribute",
"network_z" = "Network",
"temporal_z" = "Temporal")) +
labs(
title = "Status Dimension Profiles for Selected Countries",
subtitle = paste("Analysis of", latest_year, "data"),
y = "Standardized Status Score",
color = "Country"
) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 8),
legend.position = "right"
)This analysis has implemented a statistically rigorous three-dimensional approach to measuring international status:
Attribute-based status: Created via PCA from material capabilities, regime characteristics, and institutional membership.
Recognition-based status: Derived from network analysis of diplomatic relations.
Temporal status: Constructed from multiple temporal features including trajectory, volatility, and persistence.
These three dimensions were then integrated into a comprehensive status measure using a second-level PCA. This data-driven approach allows for:
The approach avoids subjective typologies in favor of letting the data reveal natural patterns in the international status system.
Key findings include: - The first dimension of our comprehensive PCA explains over 60% of variance across the three status dimensions - We identified 4 distinct status clusters with different attribute-recognition-temporal profiles - Notable status inconsistencies exist, with some countries having high material power but low recognition, or positive temporal trends despite lower current status
This multidimensional framework provides a more complete picture of status in the international system than approaches focused on single dimensions.